/* ==============================================================================
 PART 1C: Sample Restrictions
==============================================================================
Purpose:
Applies comprehensive sample restrictions to focus on deposit-taking commercial banks:
- Sample restrictions for deposit-taking commercial banks
- Exclusion of specialized bank types (credit card, trust, foreign, etc.)
- Minimum size requirements.
Furthermore, remove outliers likely due to reporting errors or extreme values.

Input:  Panel-cleaned Call Report data from Part 1B
Output: Final cleaned dataset ready for analysis (call_reports_prepared.dta)

Last updated: Aug 9, 2025
==============================================================================*/

display "--- Starting Part 1C: Sample Restrictions ---" // Indicate the start of the script

//==============================================================================
// Section 1: Sample Restrictions
//==============================================================================
// This section applies various restrictions to the sample to ensure we focus on
// deposit-taking commercial banks with significant domestic deposits,
// excluding specialized institutions and small banks.
// See manuscript for details on sample choice.
// Restrictions are applied at the bank quarter level (rssdid and yq).

// 1.1. Keep only banks in operation right before SVB episode (2022Q4)
gen temp = assets if yq == tq(2022q4)
bysort rssdid: egen temp2 = max(temp)
drop if temp2==.
drop temp temp2

//------------------------------------------------------------------------------
// 1.2. Identify and Exclude Specialized Bank Types
// 1.2.1. Credit Card Banks
// Identify major credit card specialist banks by RSSD ID
gen creditcard = 0
replace creditcard = 1 if inlist(rssdid, 112837, 3284070, 30810, 1394676, 2253891, 1391778)
// Note: Includes Ally Bank, Capital One, Discover Bank, American Express, Comenity Bank

// 1.2.2. Trust Banks
// Identify trust and custody banks by RSSD ID
gen trust = 0
replace trust = 1 if inlist(rssdid, 925411, 2265456, 214807, 210434, 35301, 541101, 934329)
// Note: Includes Sumitomo, Deutsche Bank Trust, Wilmington Trust, Northern Trust, State Street, BNY Mellon

// 1.2.3. Foreign Banks
// Identify foreign bank subsidiaries operating in the US by RSSD ID
gen foreign = 0
replace foreign = 1 if inlist(rssdid, 816210, 877369, 716413, 2980209, 925411, 2121196, 75633, 212465, 722777, 940311)
replace foreign = 1 if inlist(rssdid, 1842065, 497404, 413208, 229913, 320119, 214807, 711472, 3402913, 2991239, 1919589, 304913)
// Note: Includes Santander, Rabobank, Scotiabank, Barclays, TD Bank, BMO, MUFG, Mizuho, etc.

// 1.2.4. Broker-Dealer Banks
// Identify investment bank subsidiaries by RSSD ID
gen broker = 0
replace broker = 1 if inlist(rssdid, 2182786, 1456501, 2193616, 2489805)
// Note: Includes Goldman Sachs Bank, Morgan Stanley Bank, Raymond James Bank

// 1.2.4. Lending Club
// Identify Lending Club bank, which changed status to commercial bank only in 2021
gen lendingclub = 0
replace lendingclub = 1 if inlist(rssdid, 264772)

// 1.3. Remove Duplicate Entities
// Drop banks with same parent companies or duplicate entities by RSSD ID to avoid double-counting
drop if inlist(rssdid, 934329, 2253891, 2121196, 2489805, 1225761, 2362458)
// Note: Removes duplicate BNY Mellon, Capital One, TD Bank, Morgan Stanley, Wells Fargo entities

drop if inlist(rssdid, 165806,  344816,  398668,  447874,  488318,  521804,  651448, 678717,  772446,  790543,  948036,  1211371, 1443266, 2265456, 2434113, 2713920, 2736291, 3029589, 3076220, 3347603, 3382547, 3465392, 3804535, 4125778)
// Those banks were identified as having a larger bank in BHC

// 1.4. Apply Bank Type Exclusions
// Remove specialized bank types identified above
drop if creditcard == 1 | trust == 1 | foreign == 1 | broker == 1 | lendingclub==1

// Remove observations with missing total assets (should be handled by previous steps, but as a safeguard)
drop if missing(assets)

// Sort data for panel operations before applying time-based filters
sort rssdid yq

save "$path_clean/call_reports_prebeta.dta", replace // Call reports for beta calculation, after applying time-invariant sample restrictions and before applying time-varying and outlier-related sample restrictions

// 1.5. Deposit-Taking Institution Filter,
// Ensure banks are primarily deposit-taking institutions by requiring deposits >= 65% of assets
gen totdep_assets = (foreigndep + domdep) / assets
drop if totdep_assets < 0.65

// 1.6. Minimum Size Requirement
// Calculate bank size in billions of dollars as of 2021Q4
gen temp = assets / 1000000 if yq == tq(2021q4)
bysort rssdid: egen banksize = max(temp)
drop temp

// 1.7. Keep only banks with at least $1 billion in assets in 2021Q4
keep if banksize >= 1 & !missing(banksize)

// 1.8. Commercial Banks Only
//------------------------------------------------------------------------------
// Restrict to commercial banks (charter type 200)
// Note: this variable is imported from the legacy call reports on WRDS
// when the main analysis uses the new version of call reports
// Since legacy call reports stop in 2021q2 we carry forward the value for banks we can identify
keep if chartertype == 200

//==============================================================================
// Section 2: Outlier Removal
//==============================================================================
// This section identifies and removes specific yq observations with values likely due to
// reporting errors or extreme data points.

// Set panel structure again for lagged variables and panel operations
xtset rssdid yq
gen flag = 0 // Initialize flag for outlier identification

// 2.1. Quarterly Average Assets Consistency
// Flag if quarterly average assets deviate substantially from end-of-quarter assets (outside 0.5x to 1.5x range)
gen qavg_lower = 0.5 * assets
gen qavg_upper = 1.5 * assets
replace flag = 1 if qavgassets >= qavg_upper | qavgassets <= qavg_lower
drop qavg_lower qavg_upper

// 2.2. Asset Growth Outliers in 2022q4 (before SVB)
// Flag extreme quarterly asset growth (>±50% change from previous quarter)
sort rssdid yq
gen asset_g = s1.assets / l1.assets
replace flag = 2 if ((!missing(asset_g) & asset_g <= -0.5) | (!missing(asset_g) & asset_g >= 0.5)) & yq==yq(2022,4)
drop asset_g

// 2.3. Remove Flagged Observations
// Drop all observations flagged as outliers based on the criteria above
drop if flag > 0
drop flag // Clean up temporary variables used for flagging

save "$path_clean/call_reports_forcostreg.dta", replace

//==============================================================================
// Section 3: Sample Anchoring in 2021Q4 and 2022Q4
//==============================================================================
// This section keep only banks that remain in both 2021Q4 and 2022Q4 after previous criteria have been imposed
gen temp = assets if yq == tq(2021q4)
bysort rssdid: egen temp2 = max(temp)
drop if temp2==.
drop temp temp2

gen temp = assets if yq == tq(2022q4)
bysort rssdid: egen temp2 = max(temp)
drop if temp2==.
drop temp temp2

// Create list of banks in 2021Q4 and 2022Q4
preserve
	keep if yq==yq(2022,4)
	keep rssdid
	duplicates drop
	save "$path_clean/sample_2022q4.dta", replace
restore

display "--- Part 1C completed ---" // Indicate the completion of the script
display "Sample restrictions applied: deposit-taking banks ≥$1B, outliers removed"
display "Sample size after restrictions: $(=_N) observations for $(=r(N_g)) banks" // Display final sample size
